# Cleans the results from fitting PLANT elec gen model
library(pacman)
p_load(
  here, fst, data.table, purrr, janitor, ggplot2, lubridate, stringr,
  scales
)

# Regional load data
nerc_load_dt = 
  read.fst(
    path = here("Data/electricity-generation/clean-load/nerc-load-dt-oge.fst"),
    as.data.table = TRUE
  )
# Plant info table
plant_info_dt = 
  rbind(
    read.fst(
      path = here("Data/electricity-generation/plant-info-dt-oge.fst"),
      as.data.table = TRUE
    ),
    read.fst(
      path = here("Data/electricity-generation/shaped-info-dt-oge.fst"),
      as.data.table = TRUE
    ),
    fill = TRUE
  )|>
  setkey(plant_id_eia, year)
# Reading the electricity generation files (with fitted values)
elec_gen_dt=
  read.fst(
    path = here("Data/electricity-generation/plant-model-fit/elec-gen-fit-dt.fst"),
    as.data.table = TRUE
  )[,year := year(datetime_utc)] |>
  merge(
    plant_info_dt,
    by = c('plant_id_eia','year')
  )
# Adding IC to data 
elec_gen_dt[,
  ic := factor(fcase(
        nerc_adj %in% c('CAL','WECC'), 'West',
        nerc_adj == 'TRE', 'Texas',
        default = 'East'
      ), levels = c('East','West','Texas'))
]

# Summarizing production by region
nerc_gen_dt = 
  elec_gen_dt[,.(
    tot_net_generation_mwh = sum(net_generation_mwh),
    tot_fit_net_generation_mwh = sum(fit_net_generation_mwh)), 
    keyby = .(nerc_adj, datetime_utc)
  ]
# Summarizing production by interconnect
ic_gen_dt = 
  elec_gen_dt[,.(
    tot_net_generation_mwh = sum(net_generation_mwh),
    tot_fit_net_generation_mwh = sum(fit_net_generation_mwh)), 
    keyby = .(ic, datetime_utc)
  ]

# First looking at marginal damages 
tot_md_dt = 
  fread(
    here('Data/electricity-generation/output/damage-per-mwh-oge.csv')
  ) |>
  melt(
    id.vars = 'plant_id_eia',
    measure.vars = c('md_per_mwh_high','md_per_mwh_low')
  ) |>
  merge(
    oge_plant_info_unique_dt[,.(
      plant_id_eia, 
      nerc_adj,
      stack_height, 
      fuel_category = 
        fcase(
          fuel_category == 'natural_gas', 'Natural Gas',
          fuel_category == 'coal', 'Coal',
          #fuel_category == 'hydro', 'Hydro',
          #fuel_category == 'geothermal', 'Geothermal',
          #fuel_category == 'nuclear', 'Nuclear',
          default = 'Other'
        ) |>
        factor(
          levels = c('Natural Gas','Coal', 'Nuclear','Hydro','Geothermal','Other')
        )
    )],
    by = 'plant_id_eia'
  ) 

damages_hist = 
  ggplot(tot_md_dt) +
  geom_histogram(
    aes(x = value/1000, fill = fuel_category), 
    alpha= 0.5, 
    bins = 100,
    position = 'Identity'
  ) +
  scale_x_log10(labels = label_dollar(accuracy = 0.001)) + #labels = scales::label_dollar()
  theme_minimal() + 
  labs(
    x = 'Marginal Damage ($/KWh)',  
    y = 'Number of Power Plants'
  )+   
  scale_fill_brewer(
    name = 'Fuel Category',
    palette = 'Dark2'
  ) +
  facet_wrap(
    ~ifelse(variable == 'md_per_mwh_high', 'Above median production','Below median production'),
    scales = 'free_y',
    nrow = 2, ncol = 1
  ) + 
  theme(legend.position = 'bottom')
damages_hist
ggsave(
  plot = damages_hist,
  filename = 'figures/electricity-generation/damages-hist-oge.jpeg',
  width = 6, height = 5,
  bg = 'white'
)  


# Simple predicted vs actual with smoothing -----------------------------------
  ic_gen_dt |> 
  dplyr::group_by(ic)|>
  yardstick::rsq(
    truth = tot_net_generation_mwh, 
    estimate = tot_fit_net_generation_mwh
  )
  # For the interconnect
  model_fit_interconnect_p = 
    ggplot(
      ic_gen_dt, 
      aes(
        x = tot_net_generation_mwh/1e3, 
        y = tot_fit_net_generation_mwh/1e3
      )
    ) + 
    geom_point(alpha = 0.01, color = 'gray30', shape = 19) +
    geom_smooth() +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
    facet_wrap(
      ~ic,
      scales = "free"
    )+ 
    labs(
      x = "Actual Production (GWh)",
      y = "Predicted Production (GWh)"
    ) + 
    theme_minimal() 
  ggsave(
    plot = model_fit_interconnect_p, 
    filename = here('figures/electricity-generation/model-fit-interconnect.jpeg'),
    bg = 'white',
    width = 8, height = 3, units = 'in'
  )
  ggsave(
    plot = model_fit_interconnect_p, 
    filename = here('figures/electricity-generation/model-fit-interconnect.pdf'),
    device = cairo_pdf,
    width = 8, height = 3, units = 'in'
  )
  model_fit_region_p = 
    ggplot(
      nerc_gen_dt[nerc_adj !='TRE'], 
      aes(
        x = tot_net_generation_mwh/1e3, 
        y = tot_fit_net_generation_mwh/1e3
      )
    ) + 
    geom_point(alpha = 0.01, color = 'gray30', shape = 19) +
    geom_smooth() +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
    facet_wrap(
      ~nerc_adj,
      scales = "free"
    )+ 
    labs(
      x = "Actual Production (GWh)",
      y = "Predicted Production (GWh)"
    ) + 
    theme_minimal() 
  ggsave(
    plot = model_fit_region_p, 
    filename = here('figures/electricity-generation/model-fit-region.jpeg'),
    bg = 'white',
    width = 8, height = 5, units = 'in'
  )
  ggsave(
    plot = model_fit_region_p, 
    filename = here('figures/electricity-generation/model-fit-region.pdf'),
    device = cairo_pdf,
    width = 8, height = 5, units = 'in'
  )

# Predicted vs actual by hour and season --------------------------------------
  ic_gen_dt[,season:=fcase(
    month(datetime_utc) %in% 3:5, 'Spring',
    month(datetime_utc) %in% 6:8, 'Summer',
    month(datetime_utc) %in% 9:11, 'Fall',
    default = 'Winter'
  )]
  ic_gen_dt[,season:=factor(season, levels = c('Winter','Spring','Summer','Fall'))]
  # Giving an example week 
  model_fit_ic_hour_season_p =
    ic_gen_dt[,.(
      Actual = mean(tot_net_generation_mwh),
      Predicted = mean(tot_fit_net_generation_mwh)), 
      by = .(ic, hour = hour(datetime_utc),season 
    )] |>
    melt(
      #ic_gen_dt,
      id.vars = c('ic','hour','season')
    )|>
    ggplot(aes(x = hour, y = value/1e3, color = variable, linetype = variable)) +
    #geom_smooth(se = FALSE) +
    geom_line(size = 1.1) + 
    facet_grid(
      cols = vars(season), 
      rows = vars(ic), 
      scales = "free"
    ) +
    theme_minimal() + 
    scale_color_brewer(
      palette = "Dark2", 
      labels = c('Actual','Predicted')
    ) + 
    scale_linetype_manual(
      values = c('solid','twodash'), 
      labels = c('Actual','Predicted')
    )+
    labs(
      x = "Hour (UTC)", 
      y = "Electricity Production (GWh)",
      linetype = '',color = ''
    ) + 
    theme(legend.position = 'bottom')
  model_fit_ic_hour_season_p
  ggsave(
    plot = model_fit_ic_hour_season_p,
    filename = here("figures/electricity-generation/model-fit-ic-hour-season.jpeg"),
    bg = 'white',
    width = 8, height = 6
  )
  ggsave(
    plot = model_fit_ic_hour_season_p,
    filename = here("figures/electricity-generation/model-fit-ic-hour-season.pdf"),
    device = cairo_pdf,
    width = 8, height = 6
  )
  # Doing the same for regions now 
  nerc_gen_dt[,season:=fcase(
    month(datetime_utc) %in% 3:5, 'Spring',
    month(datetime_utc) %in% 6:8, 'Summer',
    month(datetime_utc) %in% 9:11, 'Fall',
    default = 'Winter'
  )]
  nerc_gen_dt[,season:=factor(season, levels = c('Winter','Spring','Summer','Fall'))]
  model_fit_region_hour_season_p =
    nerc_gen_dt[nerc_adj !='TRE',.(
      Actual = mean(tot_net_generation_mwh),
      Predicted = mean(tot_fit_net_generation_mwh)), 
      by = .(nerc_adj, hour = hour(datetime_utc),season 
      )] |>
    melt(
      id.vars = c('nerc_adj','hour','season')
    )|>
    ggplot(aes(x = hour, y = value/1e3, color = variable, linetype = variable)) +
    geom_line(size = 1.1) + 
    facet_grid(
      cols = vars(season), 
      rows = vars(nerc_adj), 
      scales = "free"
    ) +
    theme_minimal() + 
    scale_color_brewer(
      palette = "Dark2", 
      labels = c('Actual','Predicted')
    ) + 
    scale_linetype_manual(
      values = c('solid','twodash'), 
      labels = c('Actual','Predicted')
    )+
    labs(
      x = "Hour (UTC)", 
      y = "Electricity Production (GWh)",
      linetype = '',color = ''
    ) + 
    theme(legend.position = 'bottom')
  model_fit_region_hour_season_p
  ggsave(
    plot = model_fit_region_hour_season_p,
    filename = here("figures/electricity-generation/model-fit-region-hour-season.jpeg"),
    bg = 'white',
    width = 8, height = 9
  )
  ggsave(
    plot = model_fit_region_hour_season_p,
    filename = here("figures/electricity-generation/model-fit-region-hour-season.pdf"),
    device = cairo_pdf,
    width = 8, height = 9
  )

# Fuel mix vs load for each interconnection
  ic_fuel_gen_p = 
    elec_gen_dt[,.(
      tot_net_generation_mwh = sum(net_generation_mwh),
      tot_fit_net_generation_mwh = sum(fit_net_generation_mwh)), 
      keyby = .(
        ic =
          fcase(
            nerc_adj %in% c('CAL','WECC'), 'West',
            nerc_adj == 'TRE', 'Texas',
            default = 'East'
          ) |>
          factor(levels = c('East','West','Texas')),
        fuel_category = 
          fcase(
            fuel_category == 'natural_gas', 'Natural Gas',
            fuel_category == 'coal', 'Coal',
            fuel_category == 'hydro', 'Hydro',
            fuel_category == 'nuclear', 'Nuclear',
            #fuel_category == 'geothermal', 'Geothermal',
            default = 'Other'
          ) |>
          factor(levels = c('Natural Gas','Coal', 'Nuclear','Hydro','Other')), 
        datetime_utc
      )
    ][,':='(
      ic_tot_net_generation_mwh = sum(tot_net_generation_mwh),
      pct_gen_actual = tot_net_generation_mwh/sum(tot_net_generation_mwh), 
      pct_gen_predicted = tot_fit_net_generation_mwh/sum(tot_fit_net_generation_mwh)), 
      by = .(ic, datetime_utc)
    ] |>
    melt(
      id.vars = c('ic','fuel_category','datetime_utc','ic_tot_net_generation_mwh'),
      measure = patterns('pct_gen')
    ) |>
    ggplot(aes(
      x = ic_tot_net_generation_mwh/1e3, 
      y = value, 
      linetype = variable,
      color = fuel_category, 
    )) + 
    #geom_point(alpha = 0.2) +
    geom_smooth(se = FALSE) + 
    scale_color_brewer(name = 'Fuel Category',palette = 'Dark2') + 
    scale_y_continuous(labels = scales::label_percent()) +
    labs(
      x = 'Excess Load (GWh)',
      y = 'Percent of Electricity Production'
    ) + 
    scale_linetype_manual(
      name = 'Data',
      labels = c('Actual','Predicted'),
      values = c('solid','twodash')
    ) +
    facet_wrap(~ic, scales = 'free_x')+ 
    guides(
      linetype=guide_legend(keywidth = 4, keyheight = 1, override.aes = list(color = 'black')),
      colour=guide_legend(keywidth = 4, keyheight = 1)
    ) + 
    theme_minimal() + 
    theme(
      legend.position = 'bottom',
      legend.box = 'vertical'
    ) 
  ic_fuel_gen_p
  ggsave(
    plot = ic_fuel_gen_p,
    filename = here("figures/electricity-generation/model-fit-ic-fuel-mix.jpeg"),
    bg = 'white',
    width = 11, height = 5
  )
  ggsave(
    plot = ic_fuel_gen_p,
    filename = here("figures/electricity-generation/model-fit-ic-fuel-mix.pdf"),
    device = cairo_pdf,
    width = 11, height = 5
  )
# Total production hours predicted vs actual 
plant_hours_p =
    elec_gen_dt[,.(
      Actual = mean(net_generation_mwh > 0),
      Predicted = mean(fit_net_generation_mwh > 0),
      tot_mwh = sum(net_generation_mwh)), 
      by = .(plant_id_eia, ic)
    ] |>
    ggplot(aes(x = Actual, y = Predicted, size = tot_mwh, weight = tot_mwh)) + 
    geom_point(alpha = 0.3, color = 'gray30', shape = 19) +
    geom_smooth() +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
    # facet_wrap(
    #   ~ic,
    #   scales = "free"
    # )+ 
    scale_x_continuous(
      name = 'Actual Production Hours', 
      labels = scales::label_percent()
    )+ 
    scale_y_continuous(
      name = 'Predicted Production Hours', 
      labels = scales::label_percent()
    ) + 
    theme_minimal() +
    guides(size = 'none')
  plant_hours_p
  ggsave(
    plot = plant_hours_p,
    filename = here("figures/electricity-generation/model-fit-plant-hour.jpeg"),
    bg = 'white',
    width = 8, height = 6
  )
  ggsave(
    plot = plant_hours_p,
    filename = here("figures/electricity-generation/model-fit-plant-hour.pdf"),
    device = cairo_pdf,
    width = 8, height = 6
  )



#ex_production_month_hr_p = 
#  rbind(
#    elec_gen_dt[orispl_code == '10' & unitid == "CT10",.(datetime_utc, gload_mw_tot, type = "Actual")],
#    elec_gen_dt[orispl_code == '10' & unitid == "CT10",.(datetime_utc, gload_mw_tot = fit_gload, type = #"Predicted")]
#  ) |>
#  ggplot(aes(x = hour(datetime_utc), y = gload_mw_tot, color = type)) + 
#  geom_jitter(alpha = 0.05)+
#  geom_smooth(se = FALSE) + 
#  scale_color_viridis_d(
#    name = "", begin= 0.25, end = 0.75,option = "magma"
#  ) +
#  facet_wrap(~month(datetime_utc, label = TRUE)) + 
#  labs(
#    x = "Hour (UTC)",
#    y = "Electricity Production (MW)"
#  ) +
#  ylim(200,850) +
#  theme_minimal()
#ggsave(
#  plot = ex_production_month_hr_p,
#  filename = here("figures/electricity-generation/example-fitted-hr-generation.pdf"),
#  device = cairo_pdf,
#  width = 8, height = 6
#)

production_month_hr_p = 
  rbind(
    elec_gen_dt[
      nerc_adj == 'RFC',.(
        datetime_utc, 
        net_generation_mwh, 
        type = "Actual"
    )],
    elec_gen_dt[
      nerc_adj == 'RFC',.(
        datetime_utc, 
        net_generation_mwh = fit_net_generation_mwh, 
        type = "Predicted"
    )]
  ) |>
  ggplot(aes(x = hour(datetime_utc), y = net_generation_mwh, color = type)) + 
  #geom_jitter(alpha = 0.05)+
  geom_smooth(se = FALSE) + 
  scale_color_viridis_d(
    name = "", begin= 0.25, end = 0.75,option = "magma"
  ) +
  facet_wrap(~month(datetime_utc, label = TRUE)) + 
  #facet_wrap(~region) + 
  labs(
    x = "Hour (UTC)",
    y = "Electricity Production (MWh)"
  ) +
  theme_minimal()
ggsave(
  plot = production_month_hr_p,
  filename = here("figures/electricity-generation/fitted-hr-generation-oge-rfc.pdf"),
  device = cairo_pdf,
  width = 8, height = 6
)


# How does excess load compare to aggregate load in each BA  
load_vs_gload = 
  rbind(
    nerc_load_dt[,.(datetime_utc, nerc_adj, type = "Solar", load = load_solar)],
    nerc_load_dt[,.(datetime_utc, nerc_adj, type = "Wind", load = load_nd-load_solar)],
    nerc_load_dt[,.(datetime_utc, nerc_adj, type = "Hydro", load = load_hydro)],
    nerc_load_dt[,.(datetime_utc, nerc_adj, type = "Load", load)],
    nerc_load_dt[,.(datetime_utc, nerc_adj, type = "Excess Load", load = excess_load)],
    nerc_load_dt[,.(datetime_utc, nerc_adj, type = "Fossil Fuels", load = load_ff)],
    nerc_gen_dt[,.(datetime_utc, nerc_adj, type = "CEMS Actual", load = tot_net_generation_mwh)],
    nerc_gen_dt[,.(datetime_utc, nerc_adj, type = "CEMS Predicted", load = tot_fit_net_generation_mwh)]
  )

load_region_p =
  load_vs_gload[
    #!(nerc_adj %in%c('AK','ASCC','HI')) & 
    #  !str_detect(type,"CEMS") 
    nerc_adj == 'RFC'
    ] |>
  ggplot(aes(x = hour(datetime_utc), y = load, color = type)) +
  geom_smooth(se = FALSE) +
  facet_wrap(~nerc_adj, scales = "free") +
  theme_minimal() + 
  scale_color_brewer("", palette = "Dark2") + 
  labs(x = "Hour (UTC)", y = "Load (MWh)") + 
  theme(legend.position = 'bottom')
ggsave(
  plot = load_region_p,
  filename = here("figures/electricity-generation/fitted-load-region-oge.pdf"),
  device = cairo_pdf,
  width = 8, height = 6
)

# Giving an example week 
load_week_example_p =
  load_vs_gload[
    !is.na(nerc_adj) 
    & week(datetime_utc) == 40
    & year(datetime_utc) == 2019
    & str_detect(type,"CEMS"),
    .(load = sum(load)),
    by = .(nerc_adj, datetime_utc, type)
  ] |>
  ggplot(aes(x = datetime_utc, y = load, color = type)) +
  geom_line() +
  facet_wrap(~nerc_adj, scales = "free") +
  theme_minimal() + 
  scale_color_brewer("", palette = "Dark2") + 
  labs(x = "Time (UTC)", y = "Load (MWh)") + 
  theme(legend.position = 'bottom')
ggsave(
  plot = load_week_example_p,
  filename = here("figures/electricity-generation/load-total-ex-week-oge.pdf"),
  device = cairo_pdf,
  width = 8, height = 6
)



